{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.impute import SimpleImputer\n",
    "\n",
    "from feature_engine.wrappers import SklearnTransformerWrapper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Street</th>\n",
       "      <th>Alley</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>...</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "      <th>SalePrice</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>65.0</td>\n",
       "      <td>8450</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>208500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>80.0</td>\n",
       "      <td>9600</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2007</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>181500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>68.0</td>\n",
       "      <td>11250</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>223500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>70</td>\n",
       "      <td>RL</td>\n",
       "      <td>60.0</td>\n",
       "      <td>9550</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2006</td>\n",
       "      <td>WD</td>\n",
       "      <td>Abnorml</td>\n",
       "      <td>140000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>84.0</td>\n",
       "      <td>14260</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>250000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 81 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \\\n",
       "0   1          60       RL         65.0     8450   Pave   NaN      Reg   \n",
       "1   2          20       RL         80.0     9600   Pave   NaN      Reg   \n",
       "2   3          60       RL         68.0    11250   Pave   NaN      IR1   \n",
       "3   4          70       RL         60.0     9550   Pave   NaN      IR1   \n",
       "4   5          60       RL         84.0    14260   Pave   NaN      IR1   \n",
       "\n",
       "  LandContour Utilities  ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold  \\\n",
       "0         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   \n",
       "1         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      5   \n",
       "2         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      9   \n",
       "3         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   \n",
       "4         Lvl    AllPub  ...        0    NaN   NaN         NaN       0     12   \n",
       "\n",
       "  YrSold  SaleType  SaleCondition  SalePrice  \n",
       "0   2008        WD         Normal     208500  \n",
       "1   2007        WD         Normal     181500  \n",
       "2   2008        WD         Normal     223500  \n",
       "3   2006        WD        Abnorml     140000  \n",
       "4   2008        WD         Normal     250000  \n",
       "\n",
       "[5 rows x 81 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# load house prices data set from Kaggle\n",
    "\n",
    "data = pd.read_csv('houseprice.csv')\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((1022, 79), (438, 79))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's separate into training and testing set\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    data.drop(['Id', 'SalePrice'], axis=1),\n",
    "    data['SalePrice'],\n",
    "    test_size=0.3,\n",
    "    random_state=0)\n",
    "\n",
    "X_train.shape, X_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LotFrontage    0.184932\n",
       "MasVnrArea     0.004892\n",
       "dtype: float64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train[['LotFrontage', 'MasVnrArea']].isnull().mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## SimpleImputer\n",
    "\n",
    "### Mean imputation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SklearnTransformerWrapper(transformer=SimpleImputer(),\n",
       "                          variables=['LotFrontage', 'MasVnrArea'])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "imputer = SklearnTransformerWrapper(\n",
    "    transformer = SimpleImputer(strategy='mean'),\n",
    "    variables = ['LotFrontage', 'MasVnrArea'],\n",
    ")\n",
    "\n",
    "imputer.fit(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 69.66866747, 103.55358899])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# we can find the mean values within the parameters of the\n",
    "# simple imputer\n",
    "\n",
    "imputer.transformer_.statistics_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LotFrontage    0.0\n",
       "MasVnrArea     0.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# remove NA\n",
    "\n",
    "X_train = imputer.transform(X_train)\n",
    "X_test = imputer.transform(X_test)\n",
    "\n",
    "X_train[['LotFrontage', 'MasVnrArea']].isnull().mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Frequent category imputation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Alley</th>\n",
       "      <th>MasVnrType</th>\n",
       "      <th>BsmtQual</th>\n",
       "      <th>BsmtCond</th>\n",
       "      <th>BsmtExposure</th>\n",
       "      <th>BsmtFinType1</th>\n",
       "      <th>BsmtFinType2</th>\n",
       "      <th>Electrical</th>\n",
       "      <th>FireplaceQu</th>\n",
       "      <th>GarageType</th>\n",
       "      <th>GarageFinish</th>\n",
       "      <th>GarageQual</th>\n",
       "      <th>GarageCond</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>BrkFace</td>\n",
       "      <td>Gd</td>\n",
       "      <td>TA</td>\n",
       "      <td>No</td>\n",
       "      <td>GLQ</td>\n",
       "      <td>Unf</td>\n",
       "      <td>SBrkr</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Attchd</td>\n",
       "      <td>RFn</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>None</td>\n",
       "      <td>Gd</td>\n",
       "      <td>TA</td>\n",
       "      <td>Gd</td>\n",
       "      <td>ALQ</td>\n",
       "      <td>Unf</td>\n",
       "      <td>SBrkr</td>\n",
       "      <td>TA</td>\n",
       "      <td>Attchd</td>\n",
       "      <td>RFn</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>BrkFace</td>\n",
       "      <td>Gd</td>\n",
       "      <td>TA</td>\n",
       "      <td>Mn</td>\n",
       "      <td>GLQ</td>\n",
       "      <td>Unf</td>\n",
       "      <td>SBrkr</td>\n",
       "      <td>TA</td>\n",
       "      <td>Attchd</td>\n",
       "      <td>RFn</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>None</td>\n",
       "      <td>TA</td>\n",
       "      <td>Gd</td>\n",
       "      <td>No</td>\n",
       "      <td>ALQ</td>\n",
       "      <td>Unf</td>\n",
       "      <td>SBrkr</td>\n",
       "      <td>Gd</td>\n",
       "      <td>Detchd</td>\n",
       "      <td>Unf</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>BrkFace</td>\n",
       "      <td>Gd</td>\n",
       "      <td>TA</td>\n",
       "      <td>Av</td>\n",
       "      <td>GLQ</td>\n",
       "      <td>Unf</td>\n",
       "      <td>SBrkr</td>\n",
       "      <td>TA</td>\n",
       "      <td>Attchd</td>\n",
       "      <td>RFn</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Alley MasVnrType BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinType2  \\\n",
       "0   NaN    BrkFace       Gd       TA           No          GLQ          Unf   \n",
       "1   NaN       None       Gd       TA           Gd          ALQ          Unf   \n",
       "2   NaN    BrkFace       Gd       TA           Mn          GLQ          Unf   \n",
       "3   NaN       None       TA       Gd           No          ALQ          Unf   \n",
       "4   NaN    BrkFace       Gd       TA           Av          GLQ          Unf   \n",
       "\n",
       "  Electrical FireplaceQu GarageType GarageFinish GarageQual GarageCond PoolQC  \\\n",
       "0      SBrkr         NaN     Attchd          RFn         TA         TA    NaN   \n",
       "1      SBrkr          TA     Attchd          RFn         TA         TA    NaN   \n",
       "2      SBrkr          TA     Attchd          RFn         TA         TA    NaN   \n",
       "3      SBrkr          Gd     Detchd          Unf         TA         TA    NaN   \n",
       "4      SBrkr          TA     Attchd          RFn         TA         TA    NaN   \n",
       "\n",
       "  Fence MiscFeature  \n",
       "0   NaN         NaN  \n",
       "1   NaN         NaN  \n",
       "2   NaN         NaN  \n",
       "3   NaN         NaN  \n",
       "4   NaN         NaN  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cols = [c for c in data.columns if data[c].dtypes=='O' and data[c].isnull().sum()>0]\n",
    "data[cols].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SklearnTransformerWrapper(transformer=SimpleImputer(strategy='most_frequent'),\n",
       "                          variables=['Alley', 'MasVnrType', 'BsmtQual',\n",
       "                                     'BsmtCond', 'BsmtExposure', 'BsmtFinType1',\n",
       "                                     'BsmtFinType2', 'Electrical',\n",
       "                                     'FireplaceQu', 'GarageType',\n",
       "                                     'GarageFinish', 'GarageQual', 'GarageCond',\n",
       "                                     'PoolQC', 'Fence', 'MiscFeature'])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "imputer = SklearnTransformerWrapper(\n",
    "    transformer=SimpleImputer(strategy='most_frequent'),\n",
    "    variables=cols,\n",
    ")\n",
    "\n",
    "# find the most frequent category\n",
    "imputer.fit(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Pave', 'None', 'TA', 'TA', 'No', 'Unf', 'Unf', 'SBrkr', 'Gd',\n",
       "       'Attchd', 'Unf', 'TA', 'TA', 'Gd', 'MnPrv', 'Shed'], dtype=object)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# we can find the most frequent values within the parameters of the\n",
    "# simple imputer\n",
    "\n",
    "imputer.transformer_.statistics_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Alley           0.0\n",
       "MasVnrType      0.0\n",
       "BsmtQual        0.0\n",
       "BsmtCond        0.0\n",
       "BsmtExposure    0.0\n",
       "BsmtFinType1    0.0\n",
       "BsmtFinType2    0.0\n",
       "Electrical      0.0\n",
       "FireplaceQu     0.0\n",
       "GarageType      0.0\n",
       "GarageFinish    0.0\n",
       "GarageQual      0.0\n",
       "GarageCond      0.0\n",
       "PoolQC          0.0\n",
       "Fence           0.0\n",
       "MiscFeature     0.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# remove NA\n",
    "\n",
    "X_train = imputer.transform(X_train)\n",
    "X_test = imputer.transform(X_test)\n",
    "\n",
    "X_train[cols].isnull().mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Alley</th>\n",
       "      <th>MasVnrType</th>\n",
       "      <th>BsmtQual</th>\n",
       "      <th>BsmtCond</th>\n",
       "      <th>BsmtExposure</th>\n",
       "      <th>BsmtFinType1</th>\n",
       "      <th>BsmtFinType2</th>\n",
       "      <th>Electrical</th>\n",
       "      <th>FireplaceQu</th>\n",
       "      <th>GarageType</th>\n",
       "      <th>GarageFinish</th>\n",
       "      <th>GarageQual</th>\n",
       "      <th>GarageCond</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>529</th>\n",
       "      <td>Pave</td>\n",
       "      <td>None</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>No</td>\n",
       "      <td>Rec</td>\n",
       "      <td>Unf</td>\n",
       "      <td>SBrkr</td>\n",
       "      <td>TA</td>\n",
       "      <td>Attchd</td>\n",
       "      <td>RFn</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Gd</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>Shed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>491</th>\n",
       "      <td>Pave</td>\n",
       "      <td>None</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>No</td>\n",
       "      <td>BLQ</td>\n",
       "      <td>Rec</td>\n",
       "      <td>FuseA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Attchd</td>\n",
       "      <td>Unf</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Gd</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>Shed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>459</th>\n",
       "      <td>Pave</td>\n",
       "      <td>BrkCmn</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>No</td>\n",
       "      <td>LwQ</td>\n",
       "      <td>Unf</td>\n",
       "      <td>SBrkr</td>\n",
       "      <td>TA</td>\n",
       "      <td>Detchd</td>\n",
       "      <td>Unf</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Gd</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>Shed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>279</th>\n",
       "      <td>Pave</td>\n",
       "      <td>BrkFace</td>\n",
       "      <td>Gd</td>\n",
       "      <td>TA</td>\n",
       "      <td>No</td>\n",
       "      <td>BLQ</td>\n",
       "      <td>Unf</td>\n",
       "      <td>SBrkr</td>\n",
       "      <td>TA</td>\n",
       "      <td>Attchd</td>\n",
       "      <td>Fin</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Gd</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>Shed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>655</th>\n",
       "      <td>Pave</td>\n",
       "      <td>BrkFace</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>No</td>\n",
       "      <td>Unf</td>\n",
       "      <td>Unf</td>\n",
       "      <td>SBrkr</td>\n",
       "      <td>Gd</td>\n",
       "      <td>Detchd</td>\n",
       "      <td>Unf</td>\n",
       "      <td>TA</td>\n",
       "      <td>TA</td>\n",
       "      <td>Gd</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>Shed</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Alley MasVnrType BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinType2  \\\n",
       "529  Pave       None       TA       TA           No          Rec          Unf   \n",
       "491  Pave       None       TA       TA           No          BLQ          Rec   \n",
       "459  Pave     BrkCmn       TA       TA           No          LwQ          Unf   \n",
       "279  Pave    BrkFace       Gd       TA           No          BLQ          Unf   \n",
       "655  Pave    BrkFace       TA       TA           No          Unf          Unf   \n",
       "\n",
       "    Electrical FireplaceQu GarageType GarageFinish GarageQual GarageCond  \\\n",
       "529      SBrkr          TA     Attchd          RFn         TA         TA   \n",
       "491      FuseA          TA     Attchd          Unf         TA         TA   \n",
       "459      SBrkr          TA     Detchd          Unf         TA         TA   \n",
       "279      SBrkr          TA     Attchd          Fin         TA         TA   \n",
       "655      SBrkr          Gd     Detchd          Unf         TA         TA   \n",
       "\n",
       "    PoolQC  Fence MiscFeature  \n",
       "529     Gd  MnPrv        Shed  \n",
       "491     Gd  MnPrv        Shed  \n",
       "459     Gd  MnPrv        Shed  \n",
       "279     Gd  MnPrv        Shed  \n",
       "655     Gd  MnPrv        Shed  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test[cols].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fenotebook",
   "language": "python",
   "name": "fenotebook"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
